install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
install.packages(c("quanteda", "readtext", "snowballc", "tm", "tidytext", "tidyr", "methods"))
# clear
rm(list=ls())
# packages
library(quanteda)
library(readtext)
library(SnowballC)
library(tm)
library(tidytext)
library(tidyr)
library(methods)
#import data
load("CaPe_ISQ_leaks.RData")
# set seed
set.seed(12345)
# detach conflicting packages, if present
detach("package:quanteda", unload=TRUE)
# use required packages
library(lubridate)
library(stringr)
library(tidytext)
library(plyr)
library(dplyr)
library(broom)
library(scales)
library(twitteR)
library(wordcloud)
library(reshape2)
library(quanteda)
install.packages(c("twitteR", "wordcloud"))
# detach conflicting packages, if present
detach("package:quanteda", unload=TRUE)
# use required packages
library(lubridate)
library(stringr)
library(tidytext)
library(plyr)
library(dplyr)
library(broom)
library(scales)
library(twitteR)
library(wordcloud)
library(reshape2)
library(quanteda)
## Programmes to get sentiment on sampled 30 sentence-long articles, with bootraps
# leak programme
boot_leak_hist <- function(document=leak_cov_corpus[1],nboot=200){
means <- c()
boot_results <- c()
leak_sample_list <- NULL
sentiment_list <- NULL
for(i in 1:nboot){
leak_sentences <- corpus_reshape(document, "sentences")
leak_sentence_vector <- texts(leak_sentences)
leak_sample_list[i] <- paste(sample(leak_sentence_vector, size=30, replace=TRUE), collapse=" ")
samp_dfm <- dfm(leak_sample_list[i])
samp_tidy <- tidy(samp_dfm)
samp_sentiments <- samp_tidy %>%
inner_join(get_sentiments("bing"), by = c(term = "word"))
sentiments <- samp_sentiments %>%
count(document, sentiment, wt = count) %>%
ungroup() %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative) %>%
arrange(sentiment)
sentiment_list[i] <- as.numeric(sentiments$sentiment)
mean_sentiment <- mean(sentiment_list)
upper_sentiment <- quantile(sentiment_list, c(0.975))
lower_sentiment <- quantile(sentiment_list, c(0.025))
cat("done",i,"of",nboot,"resamples\n")
}
leak_sentiments <- sentiment_list
}
# official release programme
boot_release_hist <- function(document=release_cov_corpus[1],nboot=200){
means <- c()
boot_results <- c()
release_sample_list <- NULL
sentiment_list <- NULL
for(i in 1:nboot){
release_sentences <- corpus_reshape(document, "sentences")
release_sentence_vector <- texts(release_sentences)
release_sample_list[i] <- paste(sample(release_sentence_vector, size=30, replace=TRUE), collapse=" ")
samp_dfm <- dfm(release_sample_list[i])
samp_tidy <- tidy(samp_dfm)
samp_sentiments <- samp_tidy %>%
inner_join(get_sentiments("bing"), by = c(term = "word"))
sentiments <- samp_sentiments %>%
count(document, sentiment, wt = count) %>%
ungroup() %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative) %>%
arrange(sentiment)
sentiment_list[i] <- as.numeric(sentiments$sentiment)
mean_sentiment <- mean(sentiment_list)
upper_sentiment <- quantile(sentiment_list, c(0.975))
lower_sentiment <- quantile(sentiment_list, c(0.025))
cat("done",i,"of",nboot,"resamples\n")
}
release_sentiments <- sentiment_list
}
## results of sentiment analysis
# leak coverage
set.seed(12345)
boot_leak_sentiment_raw <- boot_leak_hist(leak_cov_corpus)
leak_mean_sentiment <- mean(boot_leak_sentiment_raw)
leak_upper_sentiment <- quantile(boot_leak_sentiment_raw, c(0.975))
leak_lower_sentiment <- quantile(boot_leak_sentiment_raw, c(0.025))
leak_sentiment_CIs <- c(leak_lower_sentiment, leak_mean_sentiment, leak_upper_sentiment)
# release coverage
set.seed(12345)
boot_release_sentiment_raw <- boot_release_hist(release_cov_corpus)
hist(boot_release_sentiment_raw)
release_mean_sentiment <- mean(boot_release_sentiment_raw)
release_upper_sentiment <- quantile(boot_release_sentiment_raw, c(0.975))
release_lower_sentiment <- quantile(boot_release_sentiment_raw, c(0.025))
release_sentiment_CIs <- c(release_lower_sentiment, release_mean_sentiment, release_upper_sentiment)
# confidence intervals
leak_sentiment_CIs
release_sentiment_CIs
# plot
library(ggplot2)
pdf('cov_leak_hist.pdf')
sentiment_data <- data.frame(cbind(boot_leak_sentiment_raw,boot_release_sentiment_raw))
colnames(sentiment_data)<- c("leak sentiment", "official release sentiment")
leakbarfill <- 'grey50'
leakbarline <- 'grey25'
ggplot(data=sentiment_data, aes(sentiment_data$`leak sentiment`)) +
geom_histogram(aes(y = ..density..),
breaks=seq(-20, 30, by = 2),
binwidth = 2,
col= leakbarline,
fill= leakbarfill,
alpha = .75) +
theme_bw() +
theme(axis.line = element_line(colour = "black"),
panel.border = element_blank(),
panel.background = element_blank()) +
geom_density(col="black") +
labs(title = "Probability density plot for leak sentiment") +
labs(x="Sentiment of sample text", y="Probability density") +
geom_vline(xintercept = 0, size = 1, colour = "black",
linetype = "dashed") +
scale_y_continuous(limits = c(0, 0.08))
dev.off()
pdf('cov_release_hist.pdf')
relbarfill <- "grey75"
relbarlines <- "grey25"
ggplot(data=sentiment_data, aes(sentiment_data$`official release sentiment`)) +
geom_histogram(aes(y = ..density..),
ylim = c(0,1),
breaks=seq(-20, 30, by = 2),
binwidth = 2,
col=relbarlines,
fill=relbarfill,
alpha = .75)  +
theme_bw() +
theme(axis.line = element_line(colour = "black"),
panel.border = element_blank(),
panel.background = element_blank()) +
geom_density(col="black") +
labs(title = "Probability density plot for official release sentiment") +
labs(x="Sentiment of sample text", y="Probability density") +
geom_vline(xintercept = 0, size = 1, colour = "black",
linetype = "dashed") +
scale_y_continuous(limits = c(0, 0.08))
dev.off()
# t-test to confirm difference in sentiment
ttable<- t.test(boot_release_sentiment_raw, boot_leak_sentiment_raw)
ttable
#### ENDS
